In [1]:
import numpy as np
import pandas as pd
import pystan
import seaborn as sns
import matplotlib.pyplot as plt
import arviz as az
from src.stan_data_format import format_stan_data_logistic
from src.utils import pickle_model, load_model, make_latex_table, save_latex_table
from src.clean import clean_exp_2, clean_exp_1, clean_exp3
from multiprocessing import cpu_count

import os
print(pystan.__version__)

%matplotlib inline
2.19.1.1
In [2]:
import os
os.environ['STAN_NUM_THREADS'] = str(cpu_count())
os.environ['NUMEXPR_NUM_THREADS'] = str(cpu_count())
In [3]:
from IPython.display import HTML
HTML('''<script>
code_show_err=false; 
function code_toggle_err() {
 if (code_show_err){
 $('div.output_stderr').hide();
 } else {
 $('div.output_stderr').show();
 }
 code_show_err = !code_show_err
} 
$( document ).ready(code_toggle_err);
</script>
To toggle on/off output_stderr, click <a href="javascript:code_toggle_err()">here</a>.''')
Out[3]:
To toggle on/off output_stderr, click here.

Preprocessing

In [4]:
# File locations
EXPERIMENT_1 = './dat/raw/Experiment1.csv'
EXPERIMENT_2 = './dat/raw/Experiment2.csv'
EXPERIMENT_3 = './dat/raw/Experiment3.csv'

# Output locations
FIGURE_OUTPUT = './out/figures/'
CHAIN_OUTPUT = './out/chains/'
MODEL_OUTPUT = './out/models/'


TABLE_OUTPUT = './out/tables/'
CLEAN_DATA_EXP1 = './dat/cleaned/clean_exp1.csv'
CLEAN_DATA_EXP2 = './dat/cleaned/clean_exp2.csv'

SIMULATION_DATA = './dat/cleaned/simulation.csv'

# Utility function for inv_logit


def ilogit(x):
    return 1/(1+np.exp(-x))

for location in [FIGURE_OUTPUT, CHAIN_OUTPUT, MODEL_OUTPUT, TABLE_OUTPUT]:
    if not os.path.exists(location):
        os.makedirs(location)
        print("created folder : ", location)
    else:
        print(location, "folder already exists.")
./out/figures/ folder already exists.
./out/chains/ folder already exists.
./out/models/ folder already exists.
./out/tables/ folder already exists.
In [5]:
# Clean and save data.
clean_exp1_df = clean_exp_1(pd.read_csv(EXPERIMENT_1))
clean_exp1_df.to_csv(CLEAN_DATA_EXP1)
In [6]:
# Clean and save data.
clean_exp2_df = clean_exp_2(pd.read_csv(EXPERIMENT_2, skiprows=[1, 2]))
clean_exp2_df.to_csv(CLEAN_DATA_EXP2)

Extract demographic information for experiments 1, 2</h3>

In [7]:
from src.demographics import demographics_exp_1, demographics_exp_2
# Plot demographic figures for SI
plt.figure(figsize=(6, 6))
dem1 = demographics_exp_1(pd.read_csv(EXPERIMENT_1))
plt.savefig(FIGURE_OUTPUT+'Exp1_demographics.pdf', fmt='pdf')
plt.figure(figsize=(6, 6))
dem2 = demographics_exp_2(pd.read_csv(CLEAN_DATA_EXP2))
plt.savefig(FIGURE_OUTPUT+'Exp2_demographics.pdf', fmt='pdf')

Experiment 1 Analysis

Convidence vs. Accuracy</h3>

In [8]:
model_confidence_accuracy_logit = pystan.StanModel(
    file='./src/confidence_accuracy_logit.stan')
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_3427a9a7c8850d25368b67c8c12231fc NOW.

Prior Predictive Check

In [9]:
# Get the data in the format the model wants to see it.
stan_data_logistic = format_stan_data_logistic(pd.read_csv(CLEAN_DATA_EXP1))
def plot_figure1b_prior(stan_data_logistic):
    x = np.linspace(np.min(stan_data_logistic['x']),np.max(stan_data_logistic['x']),10)
    plt.figure(figsize=(20, 10))
    for kdx in range(10):
        for idx in range(5):
            plot_no = idx+1 + 5 * kdx
            plt.subplot(5,10,plot_no)
            alpha_p = np.random.normal(0,.4, 1)
            beta_p = np.random.normal(0,.4, 1)
            alpha = np.random.normal(alpha_p, .4, 100)
            beta = np.random.normal(beta_p, .4, 100)
            y = np.array([alpha + beta * item for item in  x])
            y = ilogit(y)
            plt.plot(x,y, color='grey')
            #plt.ylim(.2,.8)
            #plt.xlim(50,100)
            
            if plot_no%10 == 1:
                plt.ylabel('Accuracy')
            else:
                plt.yticks([])
            if plot_no >= 40:
                plt.xlabel('Reported \nconfidence')
                plt.xticks([])
            plt.ylim(0,1)

plot_figure1b_prior(stan_data_logistic)
INFO:numexpr.utils:Note: NumExpr detected 24 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.

Sample and evaluate MCMC

In [10]:
print('Sampling confidence-accuracy model...')
# Get the data in the format the model wants to see it.
stan_data_logistic = format_stan_data_logistic(pd.read_csv(CLEAN_DATA_EXP1))
# MCMC  Time
samples_logistic = model_confidence_accuracy_logit.sampling(
    data=stan_data_logistic)

# Extract samples
extracted_samples_logistic = samples_logistic.extract(['alpha_p', 'beta_p'])
# Save model and output for easy loading later.
pickle_model(model_confidence_accuracy_logit,
             samples_logistic,
             MODEL_OUTPUT,
             CHAIN_OUTPUT,
             'confidence_accuracy_logit')
# Plot chains
#samples_logistic.plot(pars=['alpha_p', 'beta_p'])
az.plot_trace(samples_logistic,
              var_names=['alpha_p', 'beta_p'])
az.plot_forest(samples_logistic, var_names=[
               'alpha_p', 'beta_p'], credible_interval=.89)
print('...sampling complete')
Sampling confidence-accuracy model...
WARNING:pystan:Maximum (flat) parameter count (1000) exceeded: skipping diagnostic tests for n_eff and Rhat.
To run all diagnostics call pystan.check_hmc_diagnostics(fit)
/home/joseph/github/Collective-wisdom-in-polarized-groups/src/utils.py:13: UserWarning: Pickling fit objects is an experimental feature!
The relevant StanModel instance must be pickled along with this fit object.
When unpickling the StanModel must be unpickled first.
  pickle.dump(samples, output, pickle.HIGHEST_PROTOCOL)
INFO:numba.core.transforms:finding looplift candidates
...sampling complete
In [11]:
print('Saving results...',)

model_confidence_accuracy_logit, samples_logistic = load_model(MODEL_OUTPUT,
                                                               CHAIN_OUTPUT,
                                                               'confidence_accuracy_logit')
Saving results...

Figure and Table output

In [12]:
print("Generating and saving tables and figures for confidence-accuracy model...")
# Save key output as a LaTeX file.
variables = ['alpha_p', 'beta_p']
extracted_samples_logistic['beta_p']
latex_string = make_latex_table(extracted_samples_logistic, variables)
print(latex_string)
save_latex_table(TABLE_OUTPUT, 'confidence_accuracy_logit.tex', latex_string)
Generating and saving tables and figures for confidence-accuracy model...
\begin{tabular}{lrrrrr}
\toprule
variable &      Mean &        sd &      5.5\% &     50.0\% &     94.5\% \\
\midrule
 alpha\_p &  0.169131 &  0.128018 & -0.035485 &  0.165491 &  0.378400 \\
 alpha\_p &  0.341267 &  0.109191 &  0.169068 &  0.340968 &  0.512793 \\
 alpha\_p &  0.081943 &  0.111801 & -0.096242 &  0.080778 &  0.258531 \\
 alpha\_p &  0.108446 &  0.119943 & -0.086023 &  0.109395 &  0.294962 \\
 alpha\_p &  0.321710 &  0.126592 &  0.116315 &  0.321330 &  0.520756 \\
  beta\_p & -0.263564 &  0.151652 & -0.507538 & -0.263721 & -0.021475 \\
  beta\_p &  0.229024 &  0.141910 & -0.000207 &  0.227438 &  0.456970 \\
  beta\_p &  0.496161 &  0.141575 &  0.268777 &  0.497032 &  0.720154 \\
  beta\_p &  0.536749 &  0.148409 &  0.305576 &  0.534167 &  0.778091 \\
  beta\_p &  0.233977 &  0.153643 & -0.007105 &  0.229997 &  0.481634 \\
\bottomrule
\end{tabular}

In [13]:
from src.exp_1_figures import plot_figure1a
exp_1_data = pd.read_csv(CLEAN_DATA_EXP1)
plt.savefig(FIGURE_OUTPUT+'joint_hpdi.pdf', fmt='pdf')
<Figure size 432x288 with 0 Axes>
In [14]:
from src.exp_1_figures import joint_hpdi

plt.figure(figsize=(3,3))
sns.set_context('paper', font_scale=1.5)
joint_hpdi(extracted_samples_logistic)


from src.exp_1_figures import plot_figure1b

plt.figure(figsize=(3,3))
sns.set_context('paper', font_scale=1.5)
plot_figure1b(extracted_samples_logistic, stan_data_logistic)
/home/joseph/github/Collective-wisdom-in-polarized-groups/src/exp_1_figures.py:49: UserWarning: linewidths is ignored by contourf
  CS = plt.contourf(xi, yi, zi,levels = levels,
/home/joseph/github/Collective-wisdom-in-polarized-groups/src/exp_1_figures.py:49: UserWarning: The following kwargs were not used by contour: 'shade'
  CS = plt.contourf(xi, yi, zi,levels = levels,

Accuracy by Political Leaning

In [15]:
az.plot_trace(samples_logistic, var_names=['accuracy_p'])
az.plot_forest(samples_logistic, var_names=['accuracy_p'],
               credible_interval=.89)
INFO:numba.core.transforms:finding looplift candidates
Out[15]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x7f205c7cf310>],
      dtype=object)
In [16]:
extracted_samples_logistic = samples_logistic.extract(['accuracy_p'])
variables = ['accuracy_p']
extracted_samples_logistic['accuracy_p'] = ilogit(extracted_samples_logistic['accuracy_p'])
latex_string = make_latex_table(extracted_samples_logistic, variables)
print(latex_string)
save_latex_table(TABLE_OUTPUT, 'accuracy.tex', latex_string)
print("Complete")
\begin{tabular}{lrrrrr}
\toprule
   variable &      Mean &        sd &      5.5\% &     50.0\% &     94.5\% \\
\midrule
 accuracy\_p &  0.507396 &  0.021482 &  0.473018 &  0.507217 &  0.542118 \\
 accuracy\_p &  0.610359 &  0.020083 &  0.577742 &  0.610342 &  0.642055 \\
 accuracy\_p &  0.572896 &  0.021228 &  0.538627 &  0.572863 &  0.605948 \\
 accuracy\_p &  0.587509 &  0.020953 &  0.553234 &  0.587727 &  0.620980 \\
 accuracy\_p &  0.605620 &  0.021289 &  0.571606 &  0.605711 &  0.638577 \\
\bottomrule
\end{tabular}

Complete

Agent model of confidence

Prior Predictitive Checks

In [17]:
print("Sampling agent model of confidence...")
#Simulate different political groups
correct = 1 #Change to zero to simulate when incorrect

for p in range(5):
    plt.figure(figsize=(20,4))
    mu_a_pol  = np.random.normal(0,1)
    alpha_a_pol =  np.random.normal(-1,1)
    gamma_a_pol =  np.random.normal(0,1)

    mu_b_pol  =  np.random.normal(0,.5)
    alpha_b_pol =  np.random.normal(0,.5)
    gamma_b_pol =  np.random.normal(0,.5)
    #And Individuals
    for k in range(20):



        mu_a = np.random.normal(mu_a_pol,.5)
        alpha_a = np.random.normal(alpha_a_pol,.5)
        gamma_a = np.random.normal(gamma_a_pol,.5)

        mu_b  = np.random.normal(mu_b_pol,.5)
        alpha_b = np.random.normal(alpha_b_pol,.5)
        gamma_b = np.random.normal(gamma_b_pol,.5)


        mu = np.zeros(100)
        alpha = np.zeros(100)
        gamma = np.zeros(100)
        y_sim = np.zeros(100)


        plt.subplot(2,10,k+1)
        for i in range(100):
            theta = abs(np.random.normal(0,5));
            mu[i] = ilogit(mu_a + mu_b*correct);
            alpha[i] = ilogit(alpha_a + alpha_b*correct);
            gamma[i] = ilogit(gamma_a + gamma_b*correct);

            if np.random.binomial(1, alpha[i])==1:
                if np.random.binomial(1, gamma[i]) == 1:
                    y_sim[i] = 1
                else:
                    y_sim[i] = 0
            else: 
                y_sim[i] = np.random.beta(mu[i]*theta, (1-mu[i])*theta)

        sns.histplot(y_sim,bins=np.linspace(0,1,20))
        plt.xlim(0,1)
    
Sampling agent model of confidence...

Model compiliation and Sampling

In [18]:
belief_model = pystan.StanModel(file='./src/belief_model.stan')
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_5e5b3cb09920a7f1c37ee7574bf7946a NOW.
In [19]:
exp_1_data =  pd.read_csv(CLEAN_DATA_EXP1)
from src.stan_data_format import format_stan_data_belief
stan_data = format_stan_data_belief(exp_1_data)
belief_samples = belief_model.sampling(data = stan_data)
WARNING:pystan:Maximum (flat) parameter count (1000) exceeded: skipping diagnostic tests for n_eff and Rhat.
To run all diagnostics call pystan.check_hmc_diagnostics(fit)
In [20]:
pickle_model(belief_model, 
             belief_samples, 
             MODEL_OUTPUT, 
             CHAIN_OUTPUT, 
             'belief')
/home/joseph/github/Collective-wisdom-in-polarized-groups/src/utils.py:13: UserWarning: Pickling fit objects is an experimental feature!
The relevant StanModel instance must be pickled along with this fit object.
When unpickling the StanModel must be unpickled first.
  pickle.dump(samples, output, pickle.HIGHEST_PROTOCOL)

Evaluate MCMC

In [21]:
az.plot_trace(belief_samples, 
              var_names=['mu_a_pol', 
                         'alpha_a_pol',
                         'gamma_a_pol',
                        'theta']);
In [22]:
az.plot_trace(belief_samples, 
              var_names=['mu_b_pol', 
                         'alpha_b_pol',
                         'gamma_b_pol']);
In [23]:
print('Generating tables, figures and saving....')
extracted_belief_samples = belief_samples.extract(['mu_b_pol', 
                         'alpha_b_pol',
                         'gamma_b_pol','mu_a_pol', 
                         'alpha_a_pol',
                         'gamma_a_pol','theta'])
variables = ['alpha_a_pol', 'gamma_a_pol', 'mu_a_pol', 
             'alpha_b_pol', 'gamma_b_pol', 'mu_b_pol','theta' ]
# extracted_samples_logistic['accuracy_p'] = ilogit(extracted_samples_logistic['accuracy_p'])
latex_string = make_latex_table(extracted_belief_samples, variables)
print(latex_string)
save_latex_table(TABLE_OUTPUT, 'confidence_agent.tex', latex_string)
Generating tables, figures and saving....
\begin{tabular}{lrrrrr}
\toprule
    variable &      Mean &        sd &      5.5\% &     50.0\% &     94.5\% \\
\midrule
 alpha\_a\_pol & -0.264081 &  0.133258 & -0.478004 & -0.263625 & -0.050478 \\
 alpha\_a\_pol & -0.774339 &  0.142671 & -1.001961 & -0.772424 & -0.547302 \\
 alpha\_a\_pol & -0.595755 &  0.134012 & -0.814262 & -0.591665 & -0.382842 \\
 alpha\_a\_pol & -0.768281 &  0.140839 & -1.006619 & -0.765878 & -0.544186 \\
 alpha\_a\_pol & -0.279876 &  0.143357 & -0.507698 & -0.281509 & -0.051488 \\
 gamma\_a\_pol &  2.784338 &  0.293892 &  2.311580 &  2.779423 &  3.263590 \\
 gamma\_a\_pol &  2.713587 &  0.323542 &  2.222798 &  2.703476 &  3.237373 \\
 gamma\_a\_pol &  1.743062 &  0.242439 &  1.353629 &  1.741160 &  2.133307 \\
 gamma\_a\_pol &  2.321245 &  0.315833 &  1.845398 &  2.308828 &  2.843116 \\
 gamma\_a\_pol &  2.560619 &  0.286111 &  2.115986 &  2.553327 &  3.022444 \\
    mu\_a\_pol &  1.031822 &  0.097611 &  0.877555 &  1.030596 &  1.190200 \\
    mu\_a\_pol &  0.562285 &  0.089964 &  0.418823 &  0.560096 &  0.706852 \\
    mu\_a\_pol &  0.558232 &  0.088994 &  0.415035 &  0.558580 &  0.700308 \\
    mu\_a\_pol &  0.572498 &  0.089750 &  0.427107 &  0.573600 &  0.713389 \\
    mu\_a\_pol &  0.950641 &  0.101601 &  0.785779 &  0.951706 &  1.112813 \\
 alpha\_b\_pol &  0.342700 &  0.181885 &  0.055074 &  0.344477 &  0.623525 \\
 alpha\_b\_pol &  0.483889 &  0.174638 &  0.212202 &  0.484987 &  0.765070 \\
 alpha\_b\_pol &  0.611270 &  0.168575 &  0.352015 &  0.606871 &  0.883842 \\
 alpha\_b\_pol &  0.551582 &  0.179034 &  0.268417 &  0.551268 &  0.846547 \\
 alpha\_b\_pol &  0.524759 &  0.178011 &  0.242753 &  0.527436 &  0.806652 \\
 gamma\_b\_pol & -0.113979 &  0.329191 & -0.634306 & -0.121440 &  0.415262 \\
 gamma\_b\_pol &  0.051354 &  0.335849 & -0.490410 &  0.052742 &  0.591270 \\
 gamma\_b\_pol &  0.894119 &  0.292533 &  0.435858 &  0.890463 &  1.376850 \\
 gamma\_b\_pol &  0.080644 &  0.337691 & -0.473892 &  0.098033 &  0.608348 \\
 gamma\_b\_pol &  0.333093 &  0.329504 & -0.193162 &  0.343395 &  0.835819 \\
    mu\_b\_pol & -0.146143 &  0.130515 & -0.359980 & -0.141988 &  0.056353 \\
    mu\_b\_pol &  0.089573 &  0.106695 & -0.082314 &  0.089640 &  0.258947 \\
    mu\_b\_pol &  0.036632 &  0.113772 & -0.137575 &  0.031769 &  0.220310 \\
    mu\_b\_pol &  0.252123 &  0.114063 &  0.073708 &  0.248910 &  0.439276 \\
    mu\_b\_pol & -0.219162 &  0.123868 & -0.416972 & -0.219340 & -0.016385 \\
       theta &  5.717435 &  0.471900 &  4.975931 &  5.708294 &  6.470775 \\
       theta &  4.946908 &  0.334343 &  4.429132 &  4.932626 &  5.507176 \\
       theta &  4.476704 &  0.311388 &  3.998529 &  4.472898 &  4.990173 \\
       theta &  5.336441 &  0.401907 &  4.717573 &  5.326895 &  5.990801 \\
       theta &  4.613106 &  0.366645 &  4.051664 &  4.599046 &  5.228897 \\
\bottomrule
\end{tabular}

Posterior Predictive

In [24]:
#Some of those chains look a bit autocorrelated but converged, so let's look at the ppd. 

belief_model, belief_samples = load_model(MODEL_OUTPUT,
                                           CHAIN_OUTPUT, 
                                        'belief')

belief_samps = belief_samples.extract()
In [25]:
from src.posterior_predictive import plot_predicted_vs_observed_belief_model
#plt.savefig('../Graphs/PredictedVsObserved.pdf', fmt='.pdf',dpi=1500)
exp_1_data =  pd.read_csv(CLEAN_DATA_EXP1)

sns.set_context('paper',font_scale=1.5)
plt.figure(figsize=(3,3))
plot_predicted_vs_observed_belief_model(belief_samps, exp_1_data);
plt.tight_layout()
plt.savefig(FIGURE_OUTPUT+'PredctedVsObservedBelief.pdf', fmt='pdf')
/home/joseph/github/Collective-wisdom-in-polarized-groups/src/posterior_predictive.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  false_data['yhat'] = belief_samps['yhat'][-1,:]
/home/joseph/github/Collective-wisdom-in-polarized-groups/src/posterior_predictive.py:12: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  false_data['mean_yhat'] = np.mean(false_data['yhat'],axis=0)
In [26]:
from src.posterior_predictive import plot_belief_model_distributions

exp_1_data =  pd.read_csv(CLEAN_DATA_EXP1)
sns.set_context()
plt.figure(figsize=(15,6))
plot_belief_model_distributions(exp_1_data,belief_samps)
plt.savefig(FIGURE_OUTPUT+'PosteriorPredictiveConfidenceDistributions.pdf', fmt='pdf')
/home/joseph/github/Collective-wisdom-in-polarized-groups/src/posterior_predictive.py:56: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  false_data['yhat'] = belief_samps['yhat'][-1,:]
/home/joseph/github/Collective-wisdom-in-polarized-groups/src/posterior_predictive.py:57: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  false_data['mean_yhat'] = np.mean(false_data['yhat'],axis=0)

Agent model of social influence

Model for Initially Correct Participants

Compile and Sample

In [27]:
print("Sampling agent model of social influence")
plt.figure(figsize=(10, 4))
for idx in range(20):
    plt.subplot(4,5,idx+1)
    alpha_p = np.random.normal(0,2);
    b_conf_p = np.random.normal(0,2);
    b_socConf_p = np.random.normal(0,2);

    alpha = np.random.normal(alpha_p, 1, 100);
    b_conf = np.random.normal(b_conf_p, 1, 100);

    x = np.linspace(-.5,.5, 100)
    y_sim = np.array([alpha + b_conf * item for item in x])
    plt.plot(x,ilogit(y_sim),color='grey',alpha=.5)
    plt.ylim(0,1)
Sampling agent model of social influence
In [28]:
switch_model = pystan.StanModel(file='./src/switch_model.stan')
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_68dd4818b050fd456b35236fb514ed4e NOW.
In [29]:
from src.stan_data_format import format_stan_data_switch
exp_1_data =  pd.read_csv(CLEAN_DATA_EXP1)


stan_model_data_correct, _= format_stan_data_switch(exp_1_data,correct=True)

switch_samples_correct = switch_model.sampling(data=stan_model_data_correct)
WARNING:pystan:Maximum (flat) parameter count (1000) exceeded: skipping diagnostic tests for n_eff and Rhat.
To run all diagnostics call pystan.check_hmc_diagnostics(fit)

Evaluate MCMC

In [30]:
import arviz as av
#Extract samples
extracted_switch_samples_correct = switch_samples_correct.extract(['alpha_p', 
                                            'b_conf_p', 
                                            'b_socConf_p'])


#Save model and output for easy loading later. 
pickle_model(switch_model, 
             switch_samples_correct, 
             MODEL_OUTPUT, 
             CHAIN_OUTPUT, 
             'switch_samples_correct');

#Plot chains
av.plot_trace(switch_samples_correct, var_names=['alpha_p', 
                                        'b_conf_p', 
                                            'b_socConf_p']);
/home/joseph/github/Collective-wisdom-in-polarized-groups/src/utils.py:13: UserWarning: Pickling fit objects is an experimental feature!
The relevant StanModel instance must be pickled along with this fit object.
When unpickling the StanModel must be unpickled first.
  pickle.dump(samples, output, pickle.HIGHEST_PROTOCOL)

Plot and Table

In [31]:
print('Plotting Social influence figures, making tables and saving...')
from src.exp_1_figures import plot_fig1cd

stan_model_data_correct, df= format_stan_data_switch(exp_1_data,correct=True)

sns.set_context('paper', font_scale=1.5)
plt.figure(figsize=(3,3))
plot_fig1cd(stan_model_data_correct, df, extracted_switch_samples_correct)

plt.title('Initially \ncorrect')
Plotting Social influence figures, making tables and saving...
Out[31]:
Text(0.5, 1.0, 'Initially \ncorrect')
In [32]:
#Make table 
variables = ['alpha_p','b_conf_p','b_socConf_p']
latex_string = make_latex_table(extracted_switch_samples_correct, variables)
print(latex_string)
save_latex_table(TABLE_OUTPUT, 'extracted_switch_samples_correct.tex', latex_string)
\begin{tabular}{lrrrrr}
\toprule
    variable &      Mean &        sd &      5.5\% &     50.0\% &     94.5\% \\
\midrule
     alpha\_p & -1.066351 &  0.253077 & -1.468256 & -1.067909 & -0.656205 \\
     alpha\_p & -1.050508 &  0.201327 & -1.377506 & -1.047609 & -0.735239 \\
     alpha\_p & -0.951586 &  0.215089 & -1.297785 & -0.955560 & -0.607672 \\
     alpha\_p & -1.092943 &  0.230167 & -1.468781 & -1.092474 & -0.722918 \\
     alpha\_p & -0.946589 &  0.230364 & -1.311489 & -0.948824 & -0.580224 \\
    b\_conf\_p & -1.501010 &  0.593769 & -2.454571 & -1.495101 & -0.570984 \\
    b\_conf\_p & -2.076729 &  0.499776 & -2.878219 & -2.070834 & -1.270523 \\
    b\_conf\_p & -2.614019 &  0.522633 & -3.428351 & -2.621871 & -1.782536 \\
    b\_conf\_p & -2.725586 &  0.527834 & -3.558538 & -2.727251 & -1.859608 \\
    b\_conf\_p & -1.907995 &  0.528501 & -2.764271 & -1.912629 & -1.059875 \\
 b\_socConf\_p &  0.845376 &  0.534874 &  0.029864 &  0.834108 &  1.712866 \\
 b\_socConf\_p &  2.578333 &  0.482460 &  1.804559 &  2.568880 &  3.344650 \\
 b\_socConf\_p &  0.973115 &  0.523546 &  0.169887 &  0.962106 &  1.800529 \\
 b\_socConf\_p &  1.484194 &  0.550730 &  0.618532 &  1.486819 &  2.403751 \\
 b\_socConf\_p &  1.605821 &  0.504753 &  0.802355 &  1.611477 &  2.386906 \\
\bottomrule
\end{tabular}

Model for Initially Incorrect particpants

Compile and Sample

In [33]:
from src.stan_data_format import format_stan_data_switch
exp_1_data =  pd.read_csv(CLEAN_DATA_EXP1)


stan_model_data_incorrect, df = format_stan_data_switch(exp_1_data,correct=False)

switch_samples_incorrect = switch_model.sampling(data=stan_model_data_incorrect)
WARNING:pystan:Maximum (flat) parameter count (1000) exceeded: skipping diagnostic tests for n_eff and Rhat.
To run all diagnostics call pystan.check_hmc_diagnostics(fit)

Evaluate MCMC

In [34]:
import arviz as av
#Extract samples
extracted_switch_samples_incorrect = switch_samples_incorrect.extract(['alpha_p', 
                                            'b_conf_p', 
                                            'b_socConf_p'])


#Save model and output for easy loading later. 
pickle_model(switch_model, 
             switch_samples_incorrect, 
             MODEL_OUTPUT, 
             CHAIN_OUTPUT, 
             'switch_samples_incorrect');
#Plot chains
av.plot_trace(switch_samples_incorrect, var_names=['alpha_p', 
                                        'b_conf_p', 
                                            'b_socConf_p']);
/home/joseph/github/Collective-wisdom-in-polarized-groups/src/utils.py:13: UserWarning: Pickling fit objects is an experimental feature!
The relevant StanModel instance must be pickled along with this fit object.
When unpickling the StanModel must be unpickled first.
  pickle.dump(samples, output, pickle.HIGHEST_PROTOCOL)

Plot and Table

In [35]:
from src.exp_1_figures import plot_fig1cd

stan_model_data_incorrect, df= format_stan_data_switch(exp_1_data,correct=False)

sns.set_context('paper', font_scale=1.5)
plt.figure(figsize=(3,3))
plot_fig1cd(stan_model_data_incorrect, df, extracted_switch_samples_incorrect, correct=False)
plt.title('Initially \nincorrect')
Out[35]:
Text(0.5, 1.0, 'Initially \nincorrect')
In [36]:
#Make table
from src.utils import make_latex_table
variables = ['alpha_p','b_conf_p','b_socConf_p']
latex_string = make_latex_table(extracted_switch_samples_incorrect, variables)
print(latex_string)
save_latex_table(TABLE_OUTPUT, 'extracted_switch_samples_incorrect.tex', latex_string)
\begin{tabular}{lrrrrr}
\toprule
    variable &      Mean &        sd &      5.5\% &     50.0\% &     94.5\% \\
\midrule
     alpha\_p & -0.855998 &  0.278350 & -1.303842 & -0.855016 & -0.405019 \\
     alpha\_p & -0.441248 &  0.228006 & -0.810497 & -0.434930 & -0.084319 \\
     alpha\_p & -1.263911 &  0.226112 & -1.641636 & -1.262576 & -0.903561 \\
     alpha\_p & -1.352956 &  0.254128 & -1.775874 & -1.346173 & -0.958425 \\
     alpha\_p & -1.389736 &  0.290777 & -1.859496 & -1.385340 & -0.936964 \\
    b\_conf\_p & -2.651010 &  0.651695 & -3.699011 & -2.653899 & -1.609413 \\
    b\_conf\_p & -2.201474 &  0.616594 & -3.144960 & -2.226940 & -1.180842 \\
    b\_conf\_p & -1.177770 &  0.521605 & -2.035952 & -1.167196 & -0.336267 \\
    b\_conf\_p & -1.202311 &  0.654260 & -2.219577 & -1.224018 & -0.103899 \\
    b\_conf\_p & -1.106314 &  0.712747 & -2.200407 & -1.123573 &  0.040526 \\
 b\_socConf\_p & -0.100315 &  0.560212 & -0.996914 & -0.107445 &  0.782968 \\
 b\_socConf\_p & -2.740465 &  0.561172 & -3.670447 & -2.737313 & -1.845855 \\
 b\_socConf\_p & -3.017315 &  0.619269 & -3.985174 & -3.002707 & -2.066069 \\
 b\_socConf\_p & -2.357475 &  0.602081 & -3.347003 & -2.346578 & -1.392495 \\
 b\_socConf\_p & -2.714679 &  0.670076 & -3.820650 & -2.698227 & -1.662605 \\
\bottomrule
\end{tabular}

Posterior Preditive Checks

In [37]:
from src.exp_1_figures import plot_switch_predicted_acuracy
plt.figure(figsize=(8,4))
plt.subplot(121)
stan_model_data_incorrect, df= format_stan_data_switch(exp_1_data,correct=True)
plot_switch_predicted_acuracy(df, switch_samples_correct, correct=True)
plt.title('Initially \ncorrect')
ax=plt.gca()

ax.text(-0.1, 1.1, 'A', transform=ax.transAxes, 
        size=20, weight='bold')
plt.subplot(122)
stan_model_data_incorrect, df= format_stan_data_switch(exp_1_data,correct=False)
plot_switch_predicted_acuracy(df, switch_samples_incorrect, correct=False)
plt.title('Initially \nincorrect')
ax=plt.gca()

ax.text(-0.1, 1.1, 'B', transform=ax.transAxes, 
        size=20, weight='bold')
plt.tight_layout()
plt.savefig(FIGURE_OUTPUT+'PredctedVsObservedSwitch.pdf', fmt='pdf')

Figure 1

In [38]:
print("Generating figure 1")
from src.exp_1_figures import plot_figure1a
from src.exp_1_figures import plot_figure1b

sns.set_context('paper', font_scale=1.5)

plt.figure(figsize=(7,7))
plt.subplot(221)
exp_1_data = pd.read_csv(CLEAN_DATA_EXP1)
plot_figure1a(exp_1_data[exp_1_data
                         .answer==True],exp_1_data[exp_1_data.answer==False])
ax=plt.gca()
ax.text(-0.1, 1.1, 'A', transform=ax.transAxes, 
        size=20, weight='bold')
plt.tight_layout()
Generating figure 1
/home/joseph/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
/home/joseph/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
In [39]:
from src.exp_1_figures import plot_figure1a
from src.exp_1_figures import plot_figure1b

sns.set_context('paper', font_scale=1.5)

plt.figure(figsize=(7,7))
plt.subplot(221)
exp_1_data = pd.read_csv(CLEAN_DATA_EXP1)
plot_figure1a(exp_1_data[exp_1_data
                         .answer==True],exp_1_data[exp_1_data.answer==False])
ax=plt.gca()
ax.text(-0.1, 1.1, 'A', transform=ax.transAxes, 
        size=20, weight='bold')
plt.tight_layout()


plt.subplot(222)
sns.set_context('paper', font_scale=1.4)
model_confidence_accuracy_logit, samples_logistic = load_model(MODEL_OUTPUT,
                                                               CHAIN_OUTPUT, 
                                                            'confidence_accuracy_logit')
#Extract samples
extracted_samples_logistic = samples_logistic.extract(['alpha_p', 'beta_p'])
#Get the data in the format the model wants to see it. 
stan_data_logistic = format_stan_data_logistic(pd.read_csv(CLEAN_DATA_EXP1))
plot_figure1b(extracted_samples_logistic, stan_data_logistic)
ax=plt.gca()
ax.text(-0.1, 1.1, 'B', transform=ax.transAxes, 
        size=20, weight='bold')
plt.tight_layout()

plt.subplot(223)
exp_1_data =  pd.read_csv(CLEAN_DATA_EXP1)
stan_model_data_incorrect, df = format_stan_data_switch(exp_1_data,correct=True)
plot_fig1cd(stan_model_data_correct, df, extracted_switch_samples_correct)
plt.title('Initially \ncorrect')
ax=plt.gca()
ax.text(-0.1, 1.1, 'C', transform=ax.transAxes, 
        size=20, weight='bold')
plt.tight_layout()

plt.subplot(224)
exp_1_data =  pd.read_csv(CLEAN_DATA_EXP1)
stan_model_data_incorrect, df = format_stan_data_switch(exp_1_data,correct=False)
plot_fig1cd(stan_model_data_incorrect, df, extracted_switch_samples_incorrect,correct=False)
plt.title('Initially \nincorrect')
ax=plt.gca()
ax.text(-0.1, 1.1, 'D', transform=ax.transAxes, 
        size=20, weight='bold')
plt.tight_layout()

plt.savefig(FIGURE_OUTPUT+'Figure1.pdf', fmt='pdf')
/home/joseph/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
/home/joseph/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)

Simulation Study

Load Models

In [40]:
switch_samples_correct_model, switch_samples_correct = load_model(MODEL_OUTPUT,
                                                                  CHAIN_OUTPUT,
                                                                  'switch_samples_correct')
extracted_switch_samples_correct = switch_samples_correct.extract(['alpha_p',
                                                                   'b_conf_p',
                                                                   'b_socConf_p'])


switch_samples_incorrect_model, switch_samples_incorrect = load_model(MODEL_OUTPUT,
                                                                      CHAIN_OUTPUT,
                                                                      'switch_samples_incorrect')
extracted_switch_samples_incorrect = switch_samples_incorrect.extract(['alpha_p',
                                                                       'b_conf_p',
                                                                       'b_socConf_p'])


belief_model, belief_samples = load_model(MODEL_OUTPUT,
                                          CHAIN_OUTPUT,
                                          'belief')

extracted_belief_samples = belief_samples.extract()

Simulate

In [41]:
print('Running simulations... grab a cup of coffee.')
from pandarallel import pandarallel
import itertools as it
from src.simulation_study import run_single
from tqdm.auto import tqdm
from multiprocessing import cpu_count

tqdm.pandas(desc="my bar!")

pandarallel.initialize(nb_workers=cpu_count(), progress_bar=True)


dat_dict ={'proportions':[[75,50,30,50,75]],
             'p':[.5,.75,.98],
             'diff':np.linspace(.40,.6,20),
             'repeat':np.arange(2000),
             'N':[500],
             'lean':np.array(['right','left',False])}

def get_combinations_dataframe(dd):
    allNames = sorted(dd)
    combinations = it.product(*(dd[Name] for Name in allNames))
    dat = pd.DataFrame(list(combinations), columns=allNames)
    return(dat)

df = get_combinations_dataframe(dat_dict)
    
run_row = lambda dat : run_single(dat['p'],
            dat['diff'],
            dat['N'],
            dat['proportions'],
            dat['lean'],
            extracted_belief_samples,
            extracted_switch_samples_incorrect,
            extracted_switch_samples_correct)

simulation_results = df.parallel_apply(run_row, axis=1)
Running simulations... grab a cup of coffee.
INFO: Pandarallel will run on 24 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.

Store Data

In [42]:
# df['correct_final'] = np.array(np.hstack(simulation_results))
sim_results = np.vstack(simulation_results)
df['correct_final'] = sim_results[:,0]
df['correct_start'] = sim_results[:,0]
df.to_csv(SIMULATION_DATA)

Load Data

In [43]:
simulation_results = pd.read_csv(SIMULATION_DATA)
simulation_results['total'] = np.ones(simulation_results.shape[0])
simulation_results['correct_majority'] = simulation_results['correct_final'] > .5
simulation_results.head()
simulation_results.shape
Out[43]:
(360000, 11)

Plot Results

In [44]:
from scipy import stats
def get_bionomial_ci(n=100,k=200,res=1000,q = [5.5, 94.5]):
    p_grid = np.linspace(0,1,res)
    likelihood = stats.binom(k, p_grid).pmf(n)
    prior = np.ones(res)
    posterior=likelihood*prior
    posterior = posterior/np.sum(posterior)

    samples = np.random.choice(p_grid, p=posterior,size=res)
    return(np.percentile(samples, q=q))



def plot_simulation_results(temp,N=500): 
    grouped =  temp.groupby(['p', 'lean','diff']).sum().reset_index()
    grouped.head()
    #plt.scatter(grouped['diff'], grouped['correct_majority'])
    samples_per_difficulty = grouped['total'].values[0]

    ci = np.array([get_bionomial_ci(n=item,k=samples_per_difficulty) for item in grouped['correct_majority'].values])
    pal = sns.color_palette("Greens", n_colors=5)
    grouped['lower'] = ci[:,0]
    grouped['upper'] = ci[:,1]
    ps = [.5,.75,.98]
    for idx in range(3):
        temp = grouped[grouped.p==ps[idx]]
        t1 = temp.groupby(['diff']).mean().reset_index()
        plt.plot(t1['diff'].values, 
                 t1['correct_majority'].values/samples_per_difficulty, 
                 color=pal[idx+2])

        plt.fill_between(t1['diff'].values, t1['lower'], t1['upper'],color=pal[idx+2],alpha=.3)
        head_length = .05
    plt.plot(t1['diff'].values, 
             1-stats.binom(N,t1['diff'].values).cdf(N/2),
             ls='--', 
             color='grey',lw=2)
    plt.ylabel('Collective Accuracy') 
    plt.xlabel('Initial proportion correct')

def add_arrow(dat):

    temp1 = dat[dat.p==.98]
    t1 = temp1.groupby(['diff']).mean().reset_index()
    temp2 = dat[dat.p==.5]
    t2 = temp2.groupby(['diff']).mean().reset_index()
    samples_per_difficulty = t2['total'].values[0]

    diff = t1['correct_majority']/samples_per_difficulty - t2['correct_majority']/samples_per_difficulty

    maxdiff = np.argmax(np.abs(diff))
    print(t2['diff'][maxdiff])
    ax = plt.gca()
    print(t1.shape)


    head_length = .05
    ax.arrow(t2['diff'][maxdiff],
             t2['correct_majority'][maxdiff]/samples_per_difficulty, 
             0,
             t1['correct_majority'][maxdiff]/samples_per_difficulty- \
             t2['correct_majority'][maxdiff]/samples_per_difficulty + \
             head_length,
            head_width=0.015, head_length=head_length, fc='grey', ec='grey')
In [45]:
plt.figure(figsize=(6,9))
plt.subplot(311)
fig = plot_simulation_results(simulation_results[simulation_results.lean=='False'])
plt.tight_layout()
plt.subplot(312)
fig = plot_simulation_results(simulation_results[simulation_results.lean=='left'])
plt.tight_layout()
plt.subplot(313)
fig = plot_simulation_results(simulation_results[simulation_results.lean=='right'])
add_arrow(simulation_results[simulation_results.lean=='right'])
plt.tight_layout()
0.5157894736842106
(20, 9)

Experiment 2

Model compilation and sampling

Prior Predictive Simulation

In [46]:
print('Sampling model for Experiment 2')
alpha = np.random.normal(0,.2,1000);

y = np.random.binomial(100, ilogit(alpha));
sns.distplot(y)
plt.figure()
Sampling model for Experiment 2
/home/joseph/anaconda3/lib/python3.8/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
  warnings.warn(msg, FutureWarning)
Out[46]:
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
In [47]:
def format_stan_data_exp2(exp_2_data):
    false_exp2_data = exp_2_data[exp_2_data.answer==False]
    false_exp2_data['count'] = np.ones(false_exp2_data.shape[0])
    N = false_exp2_data.shape[0]
    p = 1*(false_exp2_data.p_recode.values)
    cond = false_exp2_data['cond_recode'].values+1
    y = 1*(false_exp2_data['social_correct'].values)
    false_exp2_data['cond_recode']
    pd.Categorical(false_exp2_data['cond_recode'])
    grouped_exp2 = false_exp2_data.groupby(['state', 'cond_recode', 'p_recode']).sum().reset_index()
    
    
    state_data = dict(y=grouped_exp2['social_correct'].astype('int').values,
                        N = grouped_exp2.shape[0],
                        count=grouped_exp2['count'].astype('int').values,
                        cond = grouped_exp2['p_recode'].values *3 + \
                           grouped_exp2['cond_recode'].astype('int').values+1)
    return state_data, false_exp2_data
stan_data_exp2, exp2_data = format_stan_data_exp2(pd.read_csv(CLEAN_DATA_EXP2))
  
<ipython-input-47-5f9dd8cbb5e2>:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  false_exp2_data['count'] = np.ones(false_exp2_data.shape[0])
In [48]:
#from src.stan_data_format import format_stan_data_exp2
stan_data_exp2, exp2_data = format_stan_data_exp2(pd.read_csv(CLEAN_DATA_EXP2))
exp2_model = pystan.StanModel(file='./src/experiment_2_validation.stan')
<ipython-input-47-5f9dd8cbb5e2>:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  false_exp2_data['count'] = np.ones(false_exp2_data.shape[0])
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_1440b0a435e2ad1856dabafc7c770426 NOW.
In [49]:
exp2_samples = exp2_model.sampling(data=stan_data_exp2)

Evaluate MCMC</h2

In [50]:
exp2_samples
Out[50]:
Inference for Stan model: anon_model_1440b0a435e2ad1856dabafc7c770426.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

           mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
alpha[1]   0.25  1.0e-3   0.07   0.12    0.2   0.25   0.29   0.38   4427    1.0
alpha[2]    0.2  1.0e-3   0.07   0.07   0.15    0.2   0.24   0.33   4297    1.0
alpha[3]   0.35  1.0e-3   0.07   0.22    0.3   0.35    0.4   0.48   4701    1.0
alpha[4]    0.3  1.0e-3   0.07   0.17   0.26    0.3   0.34   0.43   4280    1.0
alpha[5]   0.19  9.4e-4   0.06   0.06   0.15   0.19   0.23   0.32   4627    1.0
alpha[6]   0.11  9.5e-4   0.07  -0.02   0.07   0.11   0.15   0.24   4758    1.0
lp__      -3359    0.04   1.73  -3363  -3360  -3358  -3357  -3356   1882    1.0

Samples were drawn using NUTS at Thu Oct  7 17:21:30 2021.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).
In [51]:
az.plot_trace(exp2_samples);
In [52]:
print('Plotting and saving figures, table...')
extracted_exp2 = exp2_samples.extract()


def plot_fig2F(exp2_samples):

    pal = sns.diverging_palette(10, 220, sep=80, n=3,l=40,center='light')
    pal2 = sns.diverging_palette(10, 220, sep=80, n=3,l=40,center='dark')
    pal[1] = pal2[1]

    pal_order = [2,1,0]


    for idx in range(3):
        sns.kdeplot(ilogit(extracted_exp2['alpha'][:,3+idx])-ilogit(extracted_exp2['alpha'][:,idx]),
                    shade=True, 
                    color=pal[pal_order[idx]])

    print(np.mean(ilogit(extracted_exp2['alpha'][:,3+idx])-ilogit(extracted_exp2['alpha'][:,idx])))
    print(np.percentile(ilogit(extracted_exp2['alpha'][:,3+idx])-ilogit(extracted_exp2['alpha'][:,idx]), q=[5.5,94.5]))

    plt.xlabel('Impact of homophily\non accuracy')
    plt.ylabel('Density')

plot_fig2F(extracted_exp2)
Plotting and saving figures, table...
-0.05860267383529687
[-0.0952603  -0.02130106]

Coefficient Table

In [53]:
variables = ['logit_alpha']
extracted_exp2['logit_alpha'] = ilogit(extracted_exp2['alpha'])
latex_string = make_latex_table(extracted_exp2, variables)
print(latex_string)
save_latex_table(TABLE_OUTPUT, 'experiment2.tex', latex_string)
\begin{tabular}{lrrrrr}
\toprule
    variable &      Mean &        sd &      5.5\% &     50.0\% &     94.5\% \\
\midrule
 logit\_alpha &  0.561459 &  0.016475 &  0.534718 &  0.561442 &  0.588146 \\
 logit\_alpha &  0.549488 &  0.016290 &  0.523652 &  0.549022 &  0.575667 \\
 logit\_alpha &  0.586324 &  0.016836 &  0.559515 &  0.586554 &  0.613555 \\
 logit\_alpha &  0.574525 &  0.016311 &  0.548344 &  0.574526 &  0.600304 \\
 logit\_alpha &  0.547432 &  0.015885 &  0.522379 &  0.547737 &  0.573089 \\
 logit\_alpha &  0.527721 &  0.016353 &  0.501486 &  0.527460 &  0.554046 \\
\bottomrule
\end{tabular}

Figure 2

In [54]:
from PIL import Image

def make_square(im, min_size=1000, fill_color=(0, 0, 0, 0)):
    x, y = im.size
    size = max(min_size, x, y)
    new_im = Image.new('RGBA', (size, size), fill_color)
    new_im.paste(im, (int((size - x) / 2), int((size - y) / 2)))
    return new_im
In [55]:
simulation_results = pd.read_csv(SIMULATION_DATA)
simulation_results['total'] = np.ones(simulation_results.shape[0])
simulation_results['correct_majority'] = simulation_results['correct_final'] > .5
simulation_results.head()
Out[55]:
Unnamed: 0 N diff lean p proportions repeat correct_final correct_start total correct_majority
0 0 500 0.4 right 0.5 [75, 50, 30, 50, 75] 0 0.430 0.430 1.0 False
1 1 500 0.4 right 0.5 [75, 50, 30, 50, 75] 1 0.406 0.406 1.0 False
2 2 500 0.4 right 0.5 [75, 50, 30, 50, 75] 2 0.410 0.410 1.0 False
3 3 500 0.4 right 0.5 [75, 50, 30, 50, 75] 3 0.404 0.404 1.0 False
4 4 500 0.4 right 0.5 [75, 50, 30, 50, 75] 4 0.380 0.380 1.0 False
In [56]:
from src.exp_2_figures import plot_fig2E, plot_fig2F

sns.set_context('paper', font_scale=1.3)
plt.subplots(3,2,figsize=(7,10.5))


plt.subplot(322)
fig = plot_simulation_results(simulation_results[simulation_results.lean=='False'])
ax = plt.gca()
ax.text(-0.1, 1.1, 'B', transform=ax.transAxes, 
        size=20, weight='bold')


plt.subplot(321)
test_image = Image.open('./dat/network_img.png')
new_image = make_square(test_image)
plt.imshow(new_image)


ax = plt.gca()

ax.axis('off')
ax.text(-0.1, 1.1, 'A', transform=ax.transAxes, 
        size=20, weight='bold')

plt.tight_layout()
plt.subplot(323)
ax = plt.gca()

ax.text(-0.1, 1.1, 'C', transform=ax.transAxes, 
        size=20, weight='bold')
fig = plot_simulation_results(simulation_results[simulation_results.lean=='left'])

plt.tight_layout()
plt.subplot(324)
ax = plt.gca()

ax.text(-0.1, 1.1, 'D', transform=ax.transAxes, 
        size=20, weight='bold')
fig = plot_simulation_results(simulation_results[simulation_results.lean=='right'])
add_arrow(simulation_results[simulation_results.lean=='right'])



plt.subplot(3,2,5)
ax = plt.gca()
ax.text(-0.1, 1.1, 'E', transform=ax.transAxes, 
        size=20, weight='bold')
stan_data_exp2, exp2_data = format_stan_data_exp2(pd.read_csv(CLEAN_DATA_EXP2))
plot_fig2E(exp2_data, exp2_samples)


plt.subplot(3,2,6)
ax = plt.gca()
ax.text(-0.1, 1.1, 'F', transform=ax.transAxes, 
        size=20, weight='bold')
plot_fig2F(exp2_samples)
plt.savefig(FIGURE_OUTPUT+'Figure2.pdf',dpi=1500, fmt='pdf')
0.5157894736842106
(20, 9)
0.5863236687138076
<ipython-input-47-5f9dd8cbb5e2>:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  false_exp2_data['count'] = np.ones(false_exp2_data.shape[0])

Conceptual model

Define model and estimate parametrs from data</h3>

In [57]:
print('Computing conceptual model.....')
def fmt(x, pos):
    a, b = '{:.2e}'.format(x).split('e')
    b = int(b)
    return r'${} \times 10^{{{}}}$'.format(a, b)
Computing conceptual model.....
In [58]:
def get_prob(q1, q2, p, a11, g11, a22, g22, a21, g21, a12, g12):
    bb = p*(q1*(1-q1)*(a11-g11)) + (1-p) * (q2*a21*(1-q1)-g21*q1*(1-q2))

    cc = p*(q2*(1-q2)*(a22-g22)) + (1-p) * (q1*a12*(1-q2)-g12*q2*(1-q1))
    return (q1+q2+bb+cc)/2


#1=Libera
a11 = .25
g11 = .2
a12 = .2
g12 = .2

#2=Conservative
a22 = .2
g22 = .2
a21 = .25
g21 = .2



p = np.linspace(0.5, 1.0, 500)
q = np.linspace(0,1,500)

X, Y = np.meshgrid(q, p)
sns.set_context('paper', font_scale=1.5)
sns.set_palette(sns.diverging_palette(100, 280, s=85, l=25, n=20))

Z = get_prob(X, (1-X), Y, a11, g11, a22, g22, a21, g21, a12, g12) - \
    get_prob(X, (1-X), .5, a11, g11, a22, g22, a21, g21, a12, g12) 
fig,ax=plt.subplots(1,1)
cp = ax.contourf(X, Y, Z*100,levels=np.linspace(-1.5, 1.5, 41),cmap=plt.get_cmap('BrBG'))
cb = fig.colorbar(cp,ticks=[-1.5,-1,-.5,0,.5,1.0,1.5]) # Add a colorbar to a plot
cb.ax.set_title(r'$\times 10^{-2}$')

cb.set_label('Change in accuracy')
cb.ax.set_yticklabels(['-1.0','-1.0','-0.5',' 0.0',' 0.5', ' 1.0', ' 1.5']) 
ax.set_xlabel('$\dfrac{q_1}{q_1+q_2}$')
ax.set_ylabel('h')
plt.plot([.5,.5], [.5,1], ls='--', color='k')
plt.tight_layout()

ax.text(0.3, .1, 'Conservative \ncorrect', transform=ax.transAxes, 
        size=12, weight='bold',horizontalalignment='center')
ax.text(0.7, .1, 'Liberal \ncorrect', transform=ax.transAxes, 
        size=12, weight='bold',horizontalalignment='center')
plt.savefig(FIGURE_OUTPUT+'Figure3.pdf',dpi=1500, fmt='pdf')

Experiment 3

Format data

In [59]:
EXPERIMENT_3 = './dat/raw/Experiment3.csv'
data, melted  = clean_exp3(pd.read_csv(EXPERIMENT_3))
melted.head()
Out[59]:
conf answer question correct correct_conf incorrect_conf difficulty pol_recode DEM_1 DEM_2 ... BSS WSS EIS HBS NUMS RIS CintID edu_recode id_recode question_code
2 0.22 True Russia_False False 74.44898 82.278146 0.393574 3 25-34 Liberal ... 1.000000 3 1.4 4 3 2.0 28bed585-e303-9564-5492-e39545a3d55b 3 73 23
3 0.88 True Russia_False False 74.44898 82.278146 0.393574 0 25-34 Very Conservative ... 2.333333 0 2.4 2 0 1.6 11587f4b-61e3-301d-b25c-4973334bd79d 4 36 23
6 0.28 False Russia_False True 74.44898 82.278146 0.393574 2 18-24 Moderate ... 0.000000 1 2.0 0 1 2.0 f0319e12-096b-b176-db05-a81a1422bcc6 0 467 23
7 0.92 True Russia_False False 74.44898 82.278146 0.393574 1 55-64 Conservative ... 1.333333 4 2.8 2 3 2.0 811ae5ae-103d-a896-f0b9-d331c393a673 2 254 23
9 0.60 True Russia_False False 74.44898 82.278146 0.393574 4 18-24 Very Liberal ... 1.333333 4 0.8 2 3 1.2 7a542be2-4bf1-2fd3-bef5-1d7f0785fc36 2 235 23

5 rows × 22 columns

In [60]:
from src.exp_3_figures import plot_exp3_demographics
plot_exp3_demographics(data)
plt.savefig('./out/figures/Experiment3Demographics.pdf',transparent=True)

Load model

In [61]:
print('Sampling experiment 3 model...')
temp_model =  pystan.StanModel(file='./src/exp3_cchbm.stan')
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_546428ea88b1ceed52bd3eafb395e0d1 NOW.
Sampling experiment 3 model...
In [62]:
from src.stan_data_format import format_stan_data_exp3
exp3_stan_df = format_stan_data_exp3(melted)

Prior Predictive simulation

In [63]:
prior_samples = temp_model.sampling(data=exp3_stan_df, algorithm='Fixed_param')
WARNING:pystan:`warmup=0` forced with `algorithm="Fixed_param"`.
In [64]:
#Fit MLE estimates for regressions from the prior predictive. 
#It's a bit loose, but not out of line with what we saw in exp1, and we want 
#a lot of relationship flexibilty for questions and subjects. seems reasonable. 
from sklearn.linear_model import LogisticRegression
for idx in range(100):
    temp = melted.copy()
    chain = np.random.choice(4000)
    y_prior = prior_samples['y_prior'][chain,:]
    temp['y_prior'] = y_prior

    for idx in range(5):
        t = temp[temp['pol_recode']==idx]
        x = t['conf'].values

        clf = LogisticRegression(random_state=0).fit(x.reshape(-1, 1), t['y_prior'])

        xtemp = np.linspace(.5,1,100).reshape(-1, 1)
        ytemp = clf.predict_proba(xtemp)
        plt.plot(xtemp,ytemp,alpha=.1)
plt.title('Prior Predictive')
plt.xlabel("Confidence")
plt.ylabel('Accuracy')
plt.ylim(0,1)
plt.xlim(.5,1)
Out[64]:
(0.5, 1)

Sample model with political leaning for individual effects

In [65]:
exp3_model = pystan.StanModel(
    file='./src/exp3_cchbm.stan')
INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_546428ea88b1ceed52bd3eafb395e0d1 NOW.
In [66]:
from src.stan_data_format import format_stan_data_exp3
exp3_stan_df = format_stan_data_exp3(melted)
In [67]:
cchbml_samples = exp3_model.sampling(data=exp3_stan_df,)
WARNING:pystan:Maximum (flat) parameter count (1000) exceeded: skipping diagnostic tests for n_eff and Rhat.
To run all diagnostics call pystan.check_hmc_diagnostics(fit)
In [68]:
from src.exp_3_figures import plot_posterior_predictive_exp3
plot_posterior_predictive_exp3(melted, cchbml_samples)
plt.xlabel('Predicted accuracy')
plt.ylabel('Observed acccuracy')
plt.tight_layout()
plt.savefig('./out/figures/posterior_predictive_exp3.pdf')
/home/joseph/anaconda3/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3334: RuntimeWarning: Mean of empty slice.
  return _methods._mean(a, axis=axis, dtype=dtype,
/home/joseph/anaconda3/lib/python3.8/site-packages/numpy/core/_methods.py:161: RuntimeWarning: invalid value encountered in double_scalars
  ret = ret.dtype.type(ret / rcount)
In [69]:
az.summary(cchbml_samples, var_names=['alpha','beta','beta_q','gamma']).sort_values('r_hat',ascending=False)
Out[69]:
mean sd hpd_3% hpd_97% mcse_mean mcse_sd ess_mean ess_sd ess_bulk ess_tail r_hat
alpha 0.343 0.592 -0.754 1.417 0.016 0.011 1439.0 1439.0 1441.0 2248.0 1.0
beta_q[7,2] 0.697 0.621 -0.462 1.892 0.010 0.007 3801.0 3760.0 3862.0 3537.0 1.0
beta_q[7,0] 1.227 0.686 -0.056 2.507 0.011 0.008 3739.0 3486.0 3735.0 3549.0 1.0
beta_q[6,31] 1.561 0.718 0.189 2.888 0.012 0.008 3748.0 3643.0 3754.0 3645.0 1.0
beta_q[6,30] 1.406 0.795 -0.123 2.826 0.016 0.011 2493.0 2493.0 2472.0 3101.0 1.0
... ... ... ... ... ... ... ... ... ... ... ...
beta_q[3,11] 0.278 0.390 -0.427 1.035 0.008 0.005 2519.0 2519.0 2448.0 3347.0 1.0
beta_q[3,10] 0.378 0.378 -0.287 1.080 0.007 0.005 3302.0 3302.0 3180.0 3585.0 1.0
beta_q[3,9] 0.161 0.367 -0.519 0.882 0.007 0.005 2961.0 2961.0 2852.0 3327.0 1.0
beta_q[3,8] 0.323 0.379 -0.345 1.043 0.007 0.005 2712.0 2712.0 2550.0 3569.0 1.0
gamma[1,4] 0.143 0.257 -0.348 0.620 0.007 0.005 1274.0 1274.0 1274.0 1899.0 1.0

332 rows × 11 columns

In [70]:
az.summary(cchbml_samples,var_names=['beta_q','beta_s','gamma']).sort_values('r_hat',ascending=False).head()
Out[70]:
mean sd hpd_3% hpd_97% mcse_mean mcse_sd ess_mean ess_sd ess_bulk ess_tail r_hat
beta_s[0,168] 0.202 0.509 -0.779 1.127 0.011 0.008 1977.0 1977.0 1978.0 2875.0 1.01
beta_q[0,0] 0.563 0.679 -0.690 1.872 0.010 0.009 4501.0 3028.0 4527.0 3648.0 1.00
beta_s[1,63] 0.219 0.263 -0.260 0.741 0.008 0.005 1227.0 1227.0 1228.0 2020.0 1.00
beta_s[1,71] 0.207 0.268 -0.319 0.692 0.007 0.005 1282.0 1282.0 1283.0 2078.0 1.00
beta_s[1,70] 0.202 0.266 -0.302 0.703 0.007 0.005 1342.0 1342.0 1342.0 2043.0 1.00
In [71]:
az.plot_forest(cchbml_samples,var_names=['beta_s'])
INFO:numba.core.transforms:finding looplift candidates
Out[71]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x7f206a5ddd90>],
      dtype=object)
In [72]:
az.plot_forest(cchbml_samples,var_names=['gamma'])
INFO:numba.core.transforms:finding looplift candidates
Out[72]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x7f1eeeeec9a0>],
      dtype=object)
In [73]:
az.summary(cchbml_samples,var_names=['L_Omega_q']).sort_values('r_hat',ascending=False)
/home/joseph/anaconda3/lib/python3.8/site-packages/arviz/stats/diagnostics.py:590: RuntimeWarning: invalid value encountered in double_scalars
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/home/joseph/anaconda3/lib/python3.8/site-packages/arviz/stats/diagnostics.py:590: RuntimeWarning: invalid value encountered in double_scalars
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/home/joseph/anaconda3/lib/python3.8/site-packages/arviz/stats/diagnostics.py:590: RuntimeWarning: invalid value encountered in double_scalars
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/home/joseph/anaconda3/lib/python3.8/site-packages/arviz/stats/diagnostics.py:590: RuntimeWarning: invalid value encountered in double_scalars
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/home/joseph/anaconda3/lib/python3.8/site-packages/arviz/stats/diagnostics.py:590: RuntimeWarning: invalid value encountered in double_scalars
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/home/joseph/anaconda3/lib/python3.8/site-packages/arviz/stats/diagnostics.py:590: RuntimeWarning: invalid value encountered in double_scalars
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/home/joseph/anaconda3/lib/python3.8/site-packages/arviz/stats/diagnostics.py:590: RuntimeWarning: invalid value encountered in double_scalars
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/home/joseph/anaconda3/lib/python3.8/site-packages/arviz/stats/diagnostics.py:590: RuntimeWarning: invalid value encountered in double_scalars
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
/home/joseph/anaconda3/lib/python3.8/site-packages/arviz/stats/diagnostics.py:590: RuntimeWarning: invalid value encountered in double_scalars
  (between_chain_variance / within_chain_variance + num_samples - 1) / (num_samples)
Out[73]:
mean sd hpd_3% hpd_97% mcse_mean mcse_sd ess_mean ess_sd ess_bulk ess_tail r_hat
L_Omega_q[5,2] -0.043 0.224 -0.463 0.371 0.009 0.006 686.0 686.0 688.0 922.0 1.01
L_Omega_q[8,4] 0.118 0.252 -0.338 0.601 0.010 0.007 697.0 697.0 694.0 1154.0 1.01
L_Omega_q[7,3] 0.275 0.228 -0.162 0.673 0.009 0.007 589.0 589.0 598.0 1217.0 1.01
L_Omega_q[7,2] -0.181 0.240 -0.584 0.306 0.009 0.006 683.0 683.0 690.0 827.0 1.01
L_Omega_q[7,1] 0.164 0.228 -0.275 0.582 0.009 0.006 697.0 697.0 697.0 1365.0 1.01
... ... ... ... ... ... ... ... ... ... ... ...
L_Omega_q[6,8] 0.000 0.000 0.000 0.000 0.000 0.000 4000.0 4000.0 4000.0 4000.0 NaN
L_Omega_q[6,9] 0.000 0.000 0.000 0.000 0.000 0.000 4000.0 4000.0 4000.0 4000.0 NaN
L_Omega_q[7,8] 0.000 0.000 0.000 0.000 0.000 0.000 4000.0 4000.0 4000.0 4000.0 NaN
L_Omega_q[7,9] 0.000 0.000 0.000 0.000 0.000 0.000 4000.0 4000.0 4000.0 4000.0 NaN
L_Omega_q[8,9] 0.000 0.000 0.000 0.000 0.000 0.000 4000.0 4000.0 4000.0 4000.0 NaN

100 rows × 11 columns

In [74]:
print("Effect of confidence (VC-VL)")
print(np.mean(cchbml_samples['gamma'][:,1,0] - cchbml_samples['gamma'][:,1,4]))

print("89% CI")
print(np.percentile(cchbml_samples['gamma'][:,1,0] - cchbml_samples['gamma'][:,1,4], q=[5.5,94.5]))
Effect of confidence (VC-VL)
-0.06032751856320561
89% CI
[-0.27708748  0.16206117]
In [75]:
az.summary(cchbml_samples,var_names=['qm']).sort_values('r_hat',ascending=False)
Out[75]:
mean sd hpd_3% hpd_97% mcse_mean mcse_sd ess_mean ess_sd ess_bulk ess_tail r_hat
qm[3,12] -0.898 0.869 -2.558 0.660 0.024 0.017 1334.0 1334.0 1373.0 2209.0 1.01
qm[0,0] 0.683 0.794 -0.817 2.169 0.012 0.010 4320.0 2930.0 4328.0 2797.0 1.00
qm[5,1] 1.503 0.757 0.123 2.963 0.013 0.009 3279.0 3279.0 3327.0 2348.0 1.00
qm[6,26] 0.636 0.866 -1.035 2.226 0.012 0.012 4927.0 2545.0 4890.0 3196.0 1.00
qm[6,25] 0.331 0.837 -1.265 1.905 0.012 0.012 4723.0 2393.0 4720.0 2978.0 1.00
... ... ... ... ... ... ... ... ... ... ... ...
qm[3,9] 0.369 0.922 -1.300 2.166 0.020 0.014 2027.0 2027.0 2036.0 3043.0 1.00
qm[3,8] 0.635 0.905 -1.064 2.262 0.019 0.013 2272.0 2272.0 2299.0 2983.0 1.00
qm[3,7] -0.164 0.799 -1.669 1.268 0.012 0.013 4310.0 1985.0 4296.0 2987.0 1.00
qm[3,6] 0.081 0.816 -1.376 1.679 0.013 0.013 4137.0 2113.0 4125.0 3186.0 1.00
qm[9,31] -0.168 0.879 -1.858 1.452 0.011 0.014 6399.0 1968.0 6438.0 3044.0 1.00

320 rows × 11 columns

In [76]:
print('Plotting figure 3, generating tables, and saving everything....')   
def plot_exp3_incidental(melted, samples):
    pal = sns.diverging_palette(10, 220, sep=80, n=5,l=40,center='light')
    pal2 = sns.diverging_palette(10, 220, sep=80, n=5,l=40,center='dark')
    pal[2] = pal2[2]
    order = melted.groupby(['question_code']).mean()['correct'].argsort()

    q_names = []
    for item in melted.groupby(['question_code']):
        q_names.append(item[1]['question'][0])


    plt.figure(figsize=(6,8))

    plt.subplot(121)
    for yp,idx in enumerate(order):
        for jdx in range(5):
            temp = np.mean(samples['beta_q'][:,jdx+5,idx],axis=0)
            ci = np.percentile(samples['beta_q'][:,jdx+5,idx],q=[5.5,94.5])
            plt.scatter(temp,3*(yp+((jdx-3)*.1)),color=pal[jdx],alpha=.3)
            plt.plot([ci[0], ci[1]], [3*(yp+((jdx-3)*.1)),3*(yp+((jdx-3)*.1))],color=pal[jdx],alpha=.7)

    plt.yticks(np.arange(order.size)*3, np.array(q_names)[order])
    #plt.ylabel('alpha')
    plt.xlabel('Effect of confidence \nby question')
    plt.plot([0,0],[-1,3*np.max(order)+1],ls='--',color='k')
    plt.ylim(-2, 3*np.max(order)+3)

    plt.subplot(122)
    jdx = 2
    for yp,idx in enumerate(order):
        temp1 = samples['beta_q'][:,5,idx]
        temp2 = samples['beta_q'][:,-1,idx]
        ci = np.percentile(temp1-temp2,q=[5.5,94.5])
        plt.scatter(np.mean(temp1-temp2),3*(yp+((jdx-3)*.1)),color='k',alpha=.3)
        plt.plot([ci[0], ci[1]], [3*(yp+((jdx-3)*.1)),3*(yp+((jdx-3)*.1))],color='k',alpha=.7)

    #plt.yticks(np.arange(order.size)*3, np.array(q_names)[order])
    #plt.ylabel('alpha')
    plt.yticks([])
    plt.xlabel('VC-VL effect of \nconfidence')
    plt.plot([0,0],[-1,3*np.max(order)+1],ls='--',color='k')
    plt.ylim(-2, 3*np.max(order)+3)

plot_exp3_incidental(melted, cchbml_samples)
plt.tight_layout()
plt.savefig('./out/figures/figure4.pdf')
Plotting figure 3, generating tables, and saving everything....
In [77]:
q=[5.5,50.0,94.5]
means = np.mean(cchbml_samples['gamma'][:,0,:],axis=0)
qs = np.percentile(cchbml_samples['gamma'][:,0,:],axis=0,q=[5.5,50.0,94.5])
sigma = np.std(cchbml_samples['gamma'][:,0,:],axis=0)
var=['$\\gamma_{0,'+idx+'}$' for idx in ['VC','C','M','L','VL']]

gamma_tx = pd.DataFrame({'Variable':var,
                         'Mean':means,
                          'sd':sigma, 
                         str(q[0]) + '%':qs[0],
                        str(q[1]) + '%':qs[1],
                        str(q[2]) + '%':qs[2]})
q=[5.5,50.0,94.5]
means = np.mean(cchbml_samples['gamma'][:,1,:],axis=0)
qs = np.percentile(cchbml_samples['gamma'][:,1,:],axis=0,q=[5.5,50.0,94.5])
sigma = np.std(cchbml_samples['gamma'][:,1,:],axis=0)
var=['$\gamma_{1,'+idx+'}$' for idx in ['VC','C','M','L','VL']]


gamma_tx2 = pd.DataFrame({'Variable':var,
                         'Mean':means,
                          'sd':sigma, 
                         str(q[0]) + '%':qs[0],
                        str(q[1]) + '%':qs[1],
                        str(q[2]) + '%':qs[2]})

pd.concat([gamma_tx, gamma_tx2]).to_latex('./out/tables/Exp3GammaModel1.tex',index=False,escape=False)

gamma_tx2
Out[77]:
Variable Mean sd 5.5% 50.0% 94.5%
0 $\gamma_{1,VC}$ 0.082277 0.251116 -0.318274 0.081223 0.486800
1 $\gamma_{1,C}$ 0.201947 0.255595 -0.213101 0.203146 0.602260
2 $\gamma_{1,M}$ 0.211177 0.251998 -0.196234 0.212757 0.603250
3 $\gamma_{1,L}$ 0.255923 0.254495 -0.152740 0.255778 0.658228
4 $\gamma_{1,VL}$ 0.142605 0.256967 -0.269117 0.141423 0.553176
In [78]:
q=[5.5,50.0,94.5]
means = np.mean(np.mean(cchbml_samples['beta_q'][:,0:5,:],axis=0),axis=0)
qs = np.percentile(cchbml_samples['beta_q'][0,0:5,:],axis=0,q=[5.5,50.0,94.5])
qs[0].shape
questions = melted.groupby('question').mean().reset_index().sort_values('question_code')['question']


effects = np.mean(np.mean(cchbml_samples['beta_q'][:,5:,:],axis=0),axis=0)
effect_qs = np.percentile(cchbml_samples['beta_q'][0,5:,:],axis=0,q=[5.5,50.0,94.5])



df1 = pd.DataFrame({'Question':questions.values,'$B_\text{q,INTCP}$':means,
     str(q[0]) + '%':qs[0],
    str(q[1]) + '%':qs[1],
    str(q[2]) + '%':qs[2],
    '$B_\text{q,CONF}$':effects,
    str(q[0]) + '% ':effect_qs[0],
    str(q[1]) + '% ':effect_qs[1],
    str(q[2]) + '% ':effect_qs[2]})

df1.to_latex('./out/tables/questions_posterior.tex',index=False)

Analysis of cognitive battery

In [79]:
print('Analyzing cognitive battery....')
exp3_stan_df_cb = format_stan_data_exp3(melted,with_cb=True)
Analyzing cognitive battery....
In [80]:
cbsamples = exp3_model.sampling(data=exp3_stan_df_cb)
WARNING:pystan:Maximum (flat) parameter count (1000) exceeded: skipping diagnostic tests for n_eff and Rhat.
To run all diagnostics call pystan.check_hmc_diagnostics(fit)
In [81]:
az.summary(cbsamples, var_names=['alpha','beta','beta_q','gamma']).sort_values('r_hat',ascending=False)
Out[81]:
mean sd hpd_3% hpd_97% mcse_mean mcse_sd ess_mean ess_sd ess_bulk ess_tail r_hat
alpha 0.004 0.088 -0.156 0.171 0.001 0.001 4075.0 2510.0 4088.0 3275.0 1.0
beta_q[7,3] 2.075 0.687 0.871 3.392 0.011 0.008 3884.0 3881.0 3889.0 3451.0 1.0
beta_q[7,1] 3.025 0.786 1.609 4.500 0.014 0.010 3367.0 3367.0 3368.0 3344.0 1.0
beta_q[7,0] 1.317 0.643 0.176 2.575 0.011 0.008 3637.0 3412.0 3664.0 3435.0 1.0
beta_q[6,31] 1.815 0.699 0.542 3.163 0.011 0.008 3772.0 3498.0 3812.0 3389.0 1.0
... ... ... ... ... ... ... ... ... ... ... ...
beta_q[3,12] -0.469 0.353 -1.178 0.106 0.007 0.005 2792.0 2792.0 2581.0 2555.0 1.0
beta_q[3,11] 0.287 0.385 -0.389 1.037 0.006 0.005 3531.0 3150.0 3467.0 3473.0 1.0
beta_q[3,10] 0.399 0.371 -0.222 1.127 0.007 0.005 2907.0 2907.0 2716.0 3614.0 1.0
beta_q[3,9] 0.191 0.362 -0.467 0.893 0.007 0.005 2832.0 2737.0 2843.0 3066.0 1.0
gamma[1,5] 0.007 0.030 -0.048 0.065 0.000 0.000 6841.0 1845.0 6860.0 3190.0 1.0

334 rows × 11 columns

In [82]:
az.summary(cbsamples,var_names=['beta_q','beta_s','gamma']).sort_values('r_hat',ascending=False).head()
Out[82]:
mean sd hpd_3% hpd_97% mcse_mean mcse_sd ess_mean ess_sd ess_bulk ess_tail r_hat
beta_s[0,116] 0.229 0.186 -0.140 0.586 0.002 0.002 7649.0 3077.0 7720.0 2432.0 1.01
beta_s[0,336] -0.087 0.190 -0.435 0.294 0.002 0.003 8222.0 1793.0 8159.0 2870.0 1.01
beta_s[0,198] 0.054 0.184 -0.297 0.419 0.002 0.003 8536.0 1974.0 8844.0 3088.0 1.01
beta_s[0,128] -0.432 0.182 -0.829 -0.109 0.002 0.002 7014.0 4024.0 7329.0 2775.0 1.01
beta_s[0,386] 0.403 0.185 0.046 0.777 0.002 0.002 8420.0 5270.0 8401.0 2486.0 1.01
In [83]:
az.plot_forest(cbsamples,var_names=['beta_s'])
INFO:numba.core.transforms:finding looplift candidates
Out[83]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x7f1eedc4b490>],
      dtype=object)
In [84]:
az.plot_forest(cbsamples,var_names=['gamma'])
INFO:numba.core.transforms:finding looplift candidates
Out[84]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x7f1ebf4b3760>],
      dtype=object)
In [85]:
question_codes = pd.read_csv('./dat/Exp3questions.csv',names=['Code', 'Text'])
question_codes.sort_values('Code').to_latex('./out/tables/exp3Questions.tex',index=False)
In [86]:
q=[5.5,50.0,94.5]
means = np.mean(np.mean(cbsamples['beta_q'][:,0:5,:],axis=0),axis=0)
qs = np.percentile(cbsamples['beta_q'][0,0:5,:],axis=0,q=[5.5,50.0,94.5])
qs[0].shape
questions = melted.groupby('question').mean().reset_index().sort_values('question_code')['question']


effects = np.mean(np.mean(cbsamples['beta_q'][:,5:,:],axis=0),axis=0)
effect_qs = np.percentile(cbsamples['beta_q'][0,5:,:],axis=0,q=[5.5,50.0,94.5])



df1 = pd.DataFrame({'Question':questions.values,'$B_\text{q,INTCP}$':means,
     str(q[0]) + '%':qs[0],
    str(q[1]) + '%':qs[1],
    str(q[2]) + '%':qs[2],
    '$B_\text{q,CONF}$':effects,
    str(q[0]) + '% ':effect_qs[0],
    str(q[1]) + '% ':effect_qs[1],
    str(q[2]) + '% ':effect_qs[2]})
In [87]:
az.summary(cbsamples,var_names=['gamma'])
Out[87]:
mean sd hpd_3% hpd_97% mcse_mean mcse_sd ess_mean ess_sd ess_bulk ess_tail r_hat
gamma[0,0] 0.013 0.034 -0.051 0.080 0.0 0.001 6918.0 2125.0 6920.0 3164.0 1.0
gamma[0,1] 0.175 0.039 0.107 0.249 0.0 0.000 6108.0 5763.0 6111.0 3325.0 1.0
gamma[0,2] 0.049 0.033 -0.015 0.108 0.0 0.000 8067.0 5110.0 8103.0 3146.0 1.0
gamma[0,3] 0.063 0.035 0.000 0.128 0.0 0.000 7132.0 5929.0 7153.0 3255.0 1.0
gamma[0,4] 0.114 0.039 0.043 0.186 0.0 0.000 6357.0 5932.0 6370.0 2849.0 1.0
gamma[0,5] -0.074 0.035 -0.138 -0.006 0.0 0.000 6872.0 5372.0 6846.0 3100.0 1.0
gamma[1,0] 0.015 0.030 -0.042 0.071 0.0 0.000 7483.0 2921.0 7484.0 3323.0 1.0
gamma[1,1] 0.105 0.035 0.040 0.170 0.0 0.000 5394.0 4661.0 5406.0 2989.0 1.0
gamma[1,2] 0.013 0.029 -0.043 0.066 0.0 0.000 7997.0 2396.0 7981.0 3174.0 1.0
gamma[1,3] 0.012 0.032 -0.049 0.069 0.0 0.000 7636.0 2414.0 7636.0 2936.0 1.0
gamma[1,4] 0.088 0.036 0.018 0.151 0.0 0.000 5940.0 5137.0 5950.0 3258.0 1.0
gamma[1,5] 0.007 0.030 -0.048 0.065 0.0 0.000 6841.0 1845.0 6860.0 3190.0 1.0
In [88]:
q=[5.5,50.0,94.5]
means = np.hstack([np.mean(cbsamples['gamma'][:,0,:],axis=0),
                  np.mean(cbsamples['gamma'][:,1,:],axis=0)])
sd = np.hstack([np.std(cbsamples['gamma'][:,0,:],axis=0),
                  np.std(cbsamples['gamma'][:,1,:],axis=0)])

cis = np.hstack([np.percentile(cbsamples['gamma'][:,0,:],axis=0,q=q),
                  np.percentile(cbsamples['gamma'][:,1,:],axis=0,q=q)])
names = ['Bullshit Sensitivity','Wordsums',
         'Need for Cognition','Heuristics and Biases','Numeracy','Faith in Intuition']



df_cb = pd.DataFrame({'Trait':names + names,
                     'Variable':np.hstack([np.repeat('Intcp',6), np.repeat('Conf',6)]),
    'Mean':means,
    'sd':sd,
     str(q[0]) + '%':cis[0],
     str(q[1]) + '%':cis[1],
     str(q[2]) + '%':cis[2]})
df_cb.to_latex('./out/tables/cb.tex',index=False)
df_cb
Out[88]:
Trait Variable Mean sd 5.5% 50.0% 94.5%
0 Bullshit Sensitivity Intcp 0.013253 0.034438 -0.041903 0.013596 0.069525
1 Wordsums Intcp 0.175473 0.038641 0.114550 0.175272 0.237233
2 Need for Cognition Intcp 0.048572 0.033229 -0.004044 0.048205 0.102405
3 Heuristics and Biases Intcp 0.062786 0.034822 0.007255 0.062621 0.117655
4 Numeracy Intcp 0.113968 0.038622 0.051158 0.113890 0.175316
5 Faith in Intuition Intcp -0.074113 0.035320 -0.130837 -0.074278 -0.017626
6 Bullshit Sensitivity Conf 0.014957 0.030491 -0.033807 0.014789 0.063029
7 Wordsums Conf 0.105244 0.035052 0.049587 0.105242 0.160945
8 Need for Cognition Conf 0.013232 0.029249 -0.033893 0.012895 0.060556
9 Heuristics and Biases Conf 0.012451 0.031512 -0.038491 0.012578 0.062819
10 Numeracy Conf 0.088045 0.035694 0.030801 0.087899 0.144706
11 Faith in Intuition Conf 0.006735 0.030314 -0.041885 0.006965 0.055122
In [89]:
az.summary(cbsamples,var_names=['qm']).sort_values('r_hat',ascending=False)
Out[89]:
mean sd hpd_3% hpd_97% mcse_mean mcse_sd ess_mean ess_sd ess_bulk ess_tail r_hat
qm[8,27] 0.498 0.968 -1.358 2.284 0.014 0.016 4717.0 1905.0 4741.0 2411.0 1.01
qm[0,0] 0.851 0.773 -0.624 2.283 0.010 0.009 5732.0 3699.0 5747.0 3335.0 1.00
qm[6,18] -0.403 0.811 -1.970 1.086 0.012 0.011 4840.0 2722.0 4886.0 2818.0 1.00
qm[6,25] 0.451 0.825 -1.136 1.977 0.011 0.011 5511.0 2956.0 5503.0 3181.0 1.00
qm[6,24] 0.331 0.831 -1.079 1.986 0.012 0.012 4714.0 2368.0 4762.0 3075.0 1.00
... ... ... ... ... ... ... ... ... ... ... ...
qm[3,9] 0.457 0.924 -1.275 2.195 0.018 0.013 2640.0 2623.0 2647.0 2766.0 1.00
qm[3,8] 0.682 0.894 -1.036 2.321 0.017 0.012 2672.0 2672.0 2694.0 2649.0 1.00
qm[3,7] -0.096 0.802 -1.591 1.388 0.012 0.012 4820.0 2327.0 4826.0 3544.0 1.00
qm[3,6] 0.114 0.809 -1.427 1.570 0.012 0.011 4527.0 2491.0 4569.0 3026.0 1.00
qm[9,31] -0.176 0.861 -1.726 1.534 0.009 0.014 8666.0 1892.0 8636.0 3272.0 1.00

320 rows × 11 columns

In [90]:
print('All done... easy eh?')
All done... easy eh?
In [ ]:
 
In [ ]: